{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "c928623b",
   "metadata": {},
   "source": [
    "# Maternity Services Data Set (MSDS)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0cf48345",
   "metadata": {},
   "source": [
    "This page is a work in progress."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "71405ccc",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "\"None of [Index(['cohort', 'count'], dtype='object')] are in the [columns]\"",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mKeyError\u001b[39m                                  Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 5\u001b[39m\n\u001b[32m      3\u001b[39m sys.path.append(os.path.abspath(\u001b[33m'\u001b[39m\u001b[33m../../../../scripts/\u001b[39m\u001b[33m'\u001b[39m))\n\u001b[32m      4\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mdata_doc_helper\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m NHSEDataSet \u001b[38;5;28;01mas\u001b[39;00m DS, last_modified\n\u001b[32m----> \u001b[39m\u001b[32m5\u001b[39m ds = \u001b[43mDS\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mMSDS\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m      6\u001b[39m last_modified()\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\GitHub\\Guidebook---KE-edits\\ukllc_book\\docs\\scripts\\data_doc_helper.py:1000\u001b[39m, in \u001b[36mNHSEDataSet.__init__\u001b[39m\u001b[34m(self, dataset)\u001b[39m\n\u001b[32m    998\u001b[39m \u001b[38;5;28mself\u001b[39m.apa_cite, \u001b[38;5;28mself\u001b[39m.dl_cites = cites(\u001b[38;5;28mself\u001b[39m.doi)\n\u001b[32m    999\u001b[39m \u001b[38;5;28mself\u001b[39m.latest_v = get_latest_dsvs(\u001b[38;5;28mself\u001b[39m.dataset)\n\u001b[32m-> \u001b[39m\u001b[32m1000\u001b[39m \u001b[38;5;28mself\u001b[39m.participants = \u001b[43mcohort_total\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m)\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\GitHub\\Guidebook---KE-edits\\ukllc_book\\docs\\scripts\\data_doc_helper.py:989\u001b[39m, in \u001b[36mNHSEDataSet.__init__.<locals>.cohort_total\u001b[39m\u001b[34m(ds)\u001b[39m\n\u001b[32m    987\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mN/A - Dataset comprises of multiple auxiliary tables\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    988\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m989\u001b[39m     df = \u001b[43mmd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_nhse_cohort_counts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    990\u001b[39m     df[\u001b[33m\"\u001b[39m\u001b[33mcount\u001b[39m\u001b[33m\"\u001b[39m] = df[\u001b[33m\"\u001b[39m\u001b[33mcount\u001b[39m\u001b[33m\"\u001b[39m].apply(\u001b[38;5;28;01mlambda\u001b[39;00m x: \u001b[38;5;28mint\u001b[39m(x.replace(\u001b[33m\"\u001b[39m\u001b[33m<10\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m0\u001b[39m\u001b[33m\"\u001b[39m)))\n\u001b[32m    991\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mstr\u001b[39m(df[\u001b[33m\"\u001b[39m\u001b[33mcount\u001b[39m\u001b[33m\"\u001b[39m].sum())\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\GitHub\\Guidebook---KE-edits\\ukllc_book\\docs\\scripts\\mdapi_functions.py:24\u001b[39m, in \u001b[36mget_nhse_cohort_counts\u001b[39m\u001b[34m(ds)\u001b[39m\n\u001b[32m     22\u001b[39m data = r.text\n\u001b[32m     23\u001b[39m pj = json.loads(data)\n\u001b[32m---> \u001b[39m\u001b[32m24\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mpd\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjson_normalize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpj\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcohort\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcount\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\.conda\\envs\\jupbook\\Lib\\site-packages\\pandas\\core\\frame.py:4108\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m   4106\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[32m   4107\u001b[39m         key = \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[32m-> \u001b[39m\u001b[32m4108\u001b[39m     indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcolumns\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[32m1\u001b[39m]\n\u001b[32m   4110\u001b[39m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[32m   4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[33m\"\u001b[39m\u001b[33mdtype\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) == \u001b[38;5;28mbool\u001b[39m:\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\.conda\\envs\\jupbook\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6200\u001b[39m, in \u001b[36mIndex._get_indexer_strict\u001b[39m\u001b[34m(self, key, axis_name)\u001b[39m\n\u001b[32m   6197\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m   6198\u001b[39m     keyarr, indexer, new_indexer = \u001b[38;5;28mself\u001b[39m._reindex_non_unique(keyarr)\n\u001b[32m-> \u001b[39m\u001b[32m6200\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   6202\u001b[39m keyarr = \u001b[38;5;28mself\u001b[39m.take(indexer)\n\u001b[32m   6203\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[32m   6204\u001b[39m     \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
      "\u001b[36mFile \u001b[39m\u001b[32mc:\\Users\\sn9395\\.conda\\envs\\jupbook\\Lib\\site-packages\\pandas\\core\\indexes\\base.py:6249\u001b[39m, in \u001b[36mIndex._raise_if_missing\u001b[39m\u001b[34m(self, key, indexer, axis_name)\u001b[39m\n\u001b[32m   6247\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m nmissing:\n\u001b[32m   6248\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m nmissing == \u001b[38;5;28mlen\u001b[39m(indexer):\n\u001b[32m-> \u001b[39m\u001b[32m6249\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m]\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m   6251\u001b[39m     not_found = \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask.nonzero()[\u001b[32m0\u001b[39m]].unique())\n\u001b[32m   6252\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m not in index\u001b[39m\u001b[33m\"\u001b[39m)\n",
      "\u001b[31mKeyError\u001b[39m: \"None of [Index(['cohort', 'count'], dtype='object')] are in the [columns]\""
     ]
    }
   ],
   "source": [
    "import sys\n",
    "import os\n",
    "sys.path.append(os.path.abspath('../../../../scripts/'))\n",
    "from data_doc_helper import NHSEDataSet as DS, last_modified\n",
    "ds = DS(\"MSDS\")\n",
    "last_modified()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "91b35815",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.three_sec_summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7d719ca",
   "metadata": {},
   "source": [
    "## 1. Summary"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31f20bc2",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8c00184b",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.info_table()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ebf55707",
   "metadata": {},
   "source": [
    "## 2. Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33994195",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.metrics()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2d7fecf8",
   "metadata": {},
   "source": [
    "## 3. Version History"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a176d142",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.version_history()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12e6b815",
   "metadata": {},
   "source": [
    "## 4. Documentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "02e29089",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.documentation()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ecce32ec",
   "metadata": {},
   "source": [
    "## 5. Useful Syntax"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee22f894",
   "metadata": {
    "tags": [
     "remove-input"
    ]
   },
   "outputs": [],
   "source": [
    "ds.useful_syntax()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "jupbook",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}